From e4bced74359bffbea7da9c94ceb2858488f1fac4 Mon Sep 17 00:00:00 2001 From: Viet-Tam Luu Date: Mon, 4 Dec 2017 13:54:19 -0800 Subject: [PATCH] Better handling of UTF-8 paths (#87) * Better handling of UTF-8 paths Remove gpsbabel calls to qPrintable which destroys non-ANSI path names on Windows. Replace with QString::toUtf8() and modify low-level file handling code to use _wfopen() and other wide-char functions on Windows to support non-ASCII paths. (I did an experiment to convince myself that fopen() doesn't do UTF-8, and that _wfopen() correctly creates a file with a non-ASCII filename starting from a UTF-8 name.) Add ufopen() function as a UTF-8 wrapper for fopen(). Change inifile_init() filename argument type to QString. Leave serial port paths alone ("if your serial port path has non-ANSI characters, you're going to have a bad time"). * Fix valgrind mismatched free/malloc error. * Correctly encode output paths in native locale on non-Windows On Mac/Linux, convert output file path from internal UTF-8 to local encoding (which may be UTF-8 or something else) when creating the file. Fixes test_encoding failure. * Support Unicode paths for .gz files on Windows Use zlib's Windows-specific gzopen_w() to support Unicode paths. * Convert UTF-8 to local encoding when opening gzip file on non-Windows This should fix the test_encoding failure on Travis-CI. Also, change Windows side to manually allocating wchar_t array; although a bit less elegant, it's very localized and avoids introducing a whole new dependency on std::wstring. * Use explicit char* to QString conversion in qPrintable The implicit conversion works, but I prefer the an explicit one (if only as a reminder that we should convert all internal strings to QString). * Add comment to QString-to-wchar_t[] conversion ... since it may not be immediately obvious what that line of code does. * Fix merge conflict resolution error * Fix merge conflict (again) * Revert shape.cc * Update gbfile.cc * Update mkt_logger.cc to use QString tempfile paths. * Change ufopen() to take QString filename argument. Where converting from char*, use an explicit QString::fromUtf8() conversion rather than the implicit QString(const char*) constructor. * Use QFile::exists() and QFile::open() rather than ufopen() to verify a file exists and can be opened. * Remove redundant call to QFile::exists(). Trivial include cleanup in main.cc. * Remove superfluous QVector include. * add newline at EOF. --- bcr.cc | 2 +- defs.h | 3 +++ exif.cc | 2 +- explorist_ini.cc | 2 +- garmin_device_xml.cc | 6 ++---- gbfile.cc | 12 +++++++++--- ggv_ovl.cc | 2 +- inifile.cc | 5 +++-- inifile.h | 2 +- kml.cc | 3 ++- main.cc | 7 ++----- mtk_logger.cc | 40 +++++++++++++++++++--------------------- raymarine.cc | 2 +- util.cc | 20 +++++++++++++++++++- v900.cc | 2 +- wbt-200.cc | 2 +- 16 files changed, 67 insertions(+), 45 deletions(-) diff --git a/bcr.cc b/bcr.cc index df47edb30..274b60f44 100644 --- a/bcr.cc +++ b/bcr.cc @@ -192,7 +192,7 @@ bcr_init_radius() static void bcr_rd_init(const QString& fname) { - ini = inifile_init(qPrintable(fname), MYNAME); + ini = inifile_init(fname, MYNAME); if (ini->unicode) { cet_convert_init(CET_CHARSET_UTF8, 1); } diff --git a/defs.h b/defs.h index eec287d1f..ea2bc0ac5 100644 --- a/defs.h +++ b/defs.h @@ -931,6 +931,9 @@ void debug_mem_close(); FILE* xfopen(const char* fname, const char* type, const char* errtxt); +// Thin wrapper around fopen() that supports Unicode fname on all platforms. +FILE* ufopen(const QString& fname, const char* mode); + // OS-abstracting wrapper for getting Unicode environment variables. QString ugetenv(const char* env_var); diff --git a/exif.cc b/exif.cc index 55f5c62f3..e7f9add77 100644 --- a/exif.cc +++ b/exif.cc @@ -1408,7 +1408,7 @@ exif_wr_deinit() { exif_release_apps(); - QString tmpname = QString::fromLocal8Bit(fout->name); + QString tmpname = QString(fout->name); gbfclose(fout); if (exif_success) { diff --git a/explorist_ini.cc b/explorist_ini.cc index 5e3cabde9..8a891dec0 100644 --- a/explorist_ini.cc +++ b/explorist_ini.cc @@ -19,7 +19,7 @@ explorist_ini_try(const char* path) char* s; xasprintf(&inipath, "%s/%s", path, "APP/Atlas.ini"); - inifile = inifile_init(inipath, myname); + inifile = inifile_init(QString::fromUtf8(inipath), myname); if (!inifile) { xfree(inipath); return NULL; diff --git a/garmin_device_xml.cc b/garmin_device_xml.cc index 8621b53ce..19fa35cac 100644 --- a/garmin_device_xml.cc +++ b/garmin_device_xml.cc @@ -29,6 +29,7 @@ #include "garmin_device_xml.h" #include "xmlgeneric.h" #include +#include #include #define MYNAME "whatever" @@ -127,10 +128,7 @@ const gdx_info* gdx_read(const char* fname) { // Test file open-able before gb_open gets a chance to fatal(). - FILE* fin = fopen(fname, "r"); - - if (fin) { - fclose(fin); + if (QFile(fname).open(QIODevice::ReadOnly)) { xml_init(fname, gdx_map, NULL); xml_read(); xml_deinit(); diff --git a/gbfile.cc b/gbfile.cc index 0c9302ffe..a895ac7e3 100644 --- a/gbfile.cc +++ b/gbfile.cc @@ -84,7 +84,14 @@ gzapi_open(gbfile* self, const char* mode) SET_BINARY_MODE(fd); self->handle.gz = gzdopen(fileno(fd), openmode); } else { - self->handle.gz = gzopen(self->name, openmode); +#if __WIN32__ + // On Windows, convert UTF-8 to wchar_t[] and use gzopen_w(). + QString name(self->name); + self->handle.gz = gzopen_w((const wchar_t*) name.utf16(), openmode); +#else + // On other platforms, convert to native locale (UTF-8 or other 8-bit). + self->handle.gz = gzopen(qPrintable(QString(self->name)), openmode); +#endif } if (self->handle.gz == NULL) { @@ -538,8 +545,7 @@ gbfopen(const QString filename, const char* mode, const char* module) file->fileungetc = memapi_ungetc; file->filewrite = memapi_write; } else { - /* Be careful to convert back to local8Bit for these c based APIS */ - file->name = xstrdup(qPrintable(filename)); + file->name = xstrdup(filename); file->is_pipe = (filename == "-"); /* Do we have a '.gz' extension in the filename ? */ diff --git a/ggv_ovl.cc b/ggv_ovl.cc index 4aa1ec01c..1a305650e 100644 --- a/ggv_ovl.cc +++ b/ggv_ovl.cc @@ -82,7 +82,7 @@ static OVL_COLOR_TYP color; static void ggv_ovl_rd_init(const QString& fname) { - inifile = inifile_init(qPrintable(fname), MYNAME); + inifile = inifile_init(fname, MYNAME); if (inifile->unicode) { cet_convert_init(CET_CHARSET_UTF8, 1); } diff --git a/inifile.cc b/inifile.cc index 78b182410..e5bc348a7 100644 --- a/inifile.cc +++ b/inifile.cc @@ -208,12 +208,12 @@ inifile_find_value(const inifile_t* inifile, const char* sec_name, const char* k filename == NULL: try to open global gpsbabel.ini */ inifile_t* -inifile_init(const char* filename, const char* myname) +inifile_init(const QString& filename, const char* myname) { inifile_t* result; gbfile* fin = NULL; - if (filename == NULL) { + if (filename.isEmpty()) { fin = open_gpsbabel_inifile(); if (fin == NULL) { return NULL; @@ -335,3 +335,4 @@ inifile_readint_def(const inifile_t* inifile, const char* section, const char* k return result; } } + diff --git a/inifile.h b/inifile.h index 39bae8109..8fc2bda3d 100644 --- a/inifile.h +++ b/inifile.h @@ -34,7 +34,7 @@ typedef struct inifile_s { reads inifile filename into memory myname represents the calling module */ -inifile_t* inifile_init(const char* filename, const char* myname); +inifile_t* inifile_init(const QString& filename, const char* myname); void inifile_done(inifile_t* inifile); int inifile_has_section(const inifile_t* inifile, const char* section); diff --git a/kml.cc b/kml.cc index c6a3e6b25..f8ff4935b 100644 --- a/kml.cc +++ b/kml.cc @@ -541,7 +541,8 @@ kml_wr_deinit() if (!posnfilenametmp.isEmpty()) { #if __WIN32__ - MoveFileExA(qPrintable(posnfilenametmp), qPrintable(posnfilename), + MoveFileExW((const wchar_t*) posnfilenametmp.utf16(), + (const wchar_t*) posnfilename.utf16(), MOVEFILE_REPLACE_EXISTING); #endif QFile::rename(posnfilenametmp, posnfilename); diff --git a/main.cc b/main.cc index e61e6fa7d..a7901bc33 100644 --- a/main.cc +++ b/main.cc @@ -18,13 +18,10 @@ */ #include -#include -#include #include #include #include #include -#include #include "cet.h" #include "cet_util.h" @@ -290,7 +287,7 @@ main(int argc, char* argv[]) #endif if (gpsbabel_time != 0) { /* within testo ? */ - global_opts.inifile = inifile_init(NULL, MYNAME); + global_opts.inifile = inifile_init(QString(), MYNAME); } init_vecs(); @@ -549,7 +546,7 @@ main(int argc, char* argv[]) if (optarg.isEmpty()) { /* from GUI to preserve inconsistent options */ global_opts.inifile = NULL; } else { - global_opts.inifile = inifile_init(qPrintable(optarg), MYNAME); + global_opts.inifile = inifile_init(optarg, MYNAME); } break; case 'b': diff --git a/mtk_logger.cc b/mtk_logger.cc index 15ea67ba6..ad40e4f55 100644 --- a/mtk_logger.cc +++ b/mtk_logger.cc @@ -58,6 +58,7 @@ #include "gbfile.h" /* used for csv output */ #include "gbser.h" #include +#include #include #include #include @@ -302,16 +303,10 @@ static void dbg(int l, const char* msg, ...) // // It returns a temporary C string - it's totally kludged in to replace // TEMP_DATA_BIN being string constants. -static const char* GetTempName(bool backup) { +static const QString GetTempName(bool backup) { const char kData[]= "data.bin"; const char kDataBackup[]= "data_old.bin"; - - QString t = QDir::tempPath(); - t += QDir::separator(); - t += backup ? kDataBackup : kData; - // If your temp directory isn't representable in Latin1, you're going to - // have a bad day. - return t.toLatin1(); + return QDir::tempPath() + QDir::separator() + (backup ? kDataBackup : kData); } #define TEMP_DATA_BIN GetTempName(false) #define TEMP_DATA_BIN_OLD GetTempName(true) @@ -561,22 +556,24 @@ static void mtk_read() log_enabled = 0; init_scan = 0; - dout = fopen(TEMP_DATA_BIN, "r+b"); + dout = ufopen(TEMP_DATA_BIN, "r+b"); if (dout == NULL) { - dout = fopen(TEMP_DATA_BIN, "wb"); + dout = ufopen(TEMP_DATA_BIN, "wb"); if (dout == NULL) { - fatal(MYNAME ": Can't create temporary file %s", TEMP_DATA_BIN); + fatal(MYNAME ": Can't create temporary file %s", + qPrintable(TEMP_DATA_BIN)); return; } } fseek(dout, 0L,SEEK_END); dsize = ftell(dout); if (dsize > 1024) { - dbg(1, "Temp %s file exists. with size %d\n", TEMP_DATA_BIN, dsize); + dbg(1, "Temp %s file exists. with size %d\n", qPrintable(TEMP_DATA_BIN), + dsize); dpos = 0; init_scan = 1; } - dbg(1, "Download %s -> %s\n", port, TEMP_DATA_BIN); + dbg(1, "Download %s -> %s\n", port, qPrintable(TEMP_DATA_BIN)); // check log status - is logging disabled ? do_cmd(CMD_LOG_STATUS, "PMTK182,3,7,", &fusage, 2); @@ -612,14 +609,15 @@ static void mtk_read() dbg(1, "Download %dkB from device\n", (addr_max+1) >> 10); if (dsize > addr_max) { - dbg(1, "Temp %s file (%ld) is larger than data size %d. Data erased since last download !\n", TEMP_DATA_BIN, dsize, addr_max); + dbg(1, "Temp %s file (%ld) is larger than data size %d. Data erased since last download !\n", qPrintable(TEMP_DATA_BIN), dsize, addr_max); fclose(dout); dsize = 0; init_scan = 0; - rename(TEMP_DATA_BIN, TEMP_DATA_BIN_OLD); - dout = fopen(TEMP_DATA_BIN, "wb"); + QFile::rename(TEMP_DATA_BIN, TEMP_DATA_BIN_OLD); + dout = ufopen(TEMP_DATA_BIN, "wb"); if (dout == NULL) { - fatal(MYNAME ": Can't create temporary file %s", TEMP_DATA_BIN); + fatal(MYNAME ": Can't create temporary file %s", + qPrintable(TEMP_DATA_BIN)); return; } } @@ -742,9 +740,9 @@ mtk_retry: fseek(dout, addr, SEEK_SET); if (fread(line, 1, rcvd_bsize, dout) == rcvd_bsize && memcmp(line, data, rcvd_bsize) == 0) { dpos = addr; - dbg(2, "%s same at %d\n", TEMP_DATA_BIN, addr); + dbg(2, "%s same at %d\n", qPrintable(TEMP_DATA_BIN), addr); } else { - dbg(2, "%s differs at %d\n", TEMP_DATA_BIN, addr); + dbg(2, "%s differs at %d\n", qPrintable(TEMP_DATA_BIN), addr); init_scan = 0; addr = dpos; bsize = read_bsize; @@ -943,7 +941,7 @@ static void mtk_csv_init(char* csv_fname, unsigned long bitmask) dbg(1, "Opening csv output file %s...\n", csv_fname); // can't use gbfopen here - it will fatal() if file doesn't exist - if ((cf = fopen(csv_fname, "r")) != NULL) { + if ((cf = ufopen(QString::fromUtf8(csv_fname), "r")) != NULL) { fclose(cf); warning(MYNAME ": CSV file %s already exist ! Cowardly refusing to overwrite.\n", csv_fname); return; @@ -1476,7 +1474,7 @@ static void file_init_m241(const QString& fname) static void file_init(const QString& fname) { dbg(4, "Opening file %s...\n", qPrintable(fname)); - if (fl = fopen(qPrintable(fname), "rb"), NULL == fl) { + if (fl = ufopen(fname, "rb"), NULL == fl) { fatal(MYNAME ": Can't open file '%s'\n", qPrintable(fname)); } switch (mtk_device) { diff --git a/raymarine.cc b/raymarine.cc index 0ab9673a2..6d15c54bb 100644 --- a/raymarine.cc +++ b/raymarine.cc @@ -169,7 +169,7 @@ find_symbol_num(const QString& descr) static void raymarine_rd_init(const QString& fname) { - fin = inifile_init(qPrintable(fname), MYNAME); + fin = inifile_init(fname, MYNAME); if (fin->unicode) { cet_convert_init(CET_CHARSET_UTF8, 1); } diff --git a/util.cc b/util.cc index fd567371a..a1ad01f26 100644 --- a/util.cc +++ b/util.cc @@ -248,7 +248,7 @@ xfopen(const char* fname, const char* type, const char* errtxt) if (0 == strcmp(fname, "-")) { return am_writing ? stdout : stdin; } - f = fopen(fname, type); + f = ufopen(QString::fromUtf8(fname), type); if (NULL == f) { fatal("%s cannot open '%s' for %s. Error was '%s'.\n", errtxt, fname, @@ -258,6 +258,23 @@ xfopen(const char* fname, const char* type, const char* errtxt) return f; } +/* + * Thin wrapper around fopen() that supports UTF-8 fname on all platforms. + */ +FILE* +ufopen(const QString& fname, const char* mode) +{ +#if __WIN32__ + // On Windows standard fopen() doesn't support UTF-8, so we have to convert + // to wchar_t* (UTF-16) and use the wide-char version of fopen(), _wfopen(). + return _wfopen((const wchar_t*) fname.utf16(), + (const wchar_t*) QString(mode).utf16()); +#else + // On other platforms, convert to native locale (UTF-8 or other 8-bit). + return fopen(qPrintable(fname), mode); +#endif +} + /* * OS-abstracting wrapper for getting Unicode environment variables. */ @@ -271,6 +288,7 @@ QString ugetenv(const char* env_var) { return QString::fromLocal8Bit(std::getenv(env_var)); #endif } + /* * Allocate a string using a format list with optional arguments * Returns -1 on error. diff --git a/v900.cc b/v900.cc index b66338cb0..f260ddd8e 100644 --- a/v900.cc +++ b/v900.cc @@ -166,7 +166,7 @@ v900_rd_init(const QString& fname) that will be translated to a single \n, making the line len one character shorter than on linux machines. */ - fin = fopen(qPrintable(fname),"rb"); + fin = ufopen(fname, "rb"); if (!fin) { fatal("v900: could not open '%s'.\n", qPrintable(fname)); } diff --git a/wbt-200.cc b/wbt-200.cc index c43b9a0cb..42a8a47fe 100644 --- a/wbt-200.cc +++ b/wbt-200.cc @@ -430,7 +430,7 @@ static int rd_buf(void* buf, int len) static void file_init(const QString& fname) { db(1, "Opening file...\n"); - if ((fl = fopen(qPrintable(fname), "rb")) == NULL) { + if ((fl = ufopen(fname, "rb")) == NULL) { fatal(MYNAME ": Can't open file '%s'\n", qPrintable(fname)); } } -- 2.30.2